import pytesseract
from PIL import Image
import os
#chi_sim中文包要下载在tesseractdata文件夹下
# 设置Tesseract的路径
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# 定义图片文件夹路径
# image_folder = 'D:\py\录像得到的图片'
image_folder = r'D:\py\去重最后图片\中医科养生2'

file_name = image_folder.split('\\')[-1]
# 遍历图片文件夹中的所有图片
for filename in os.listdir(image_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # 读取图片文件
        image_path = os.path.join(image_folder, filename)
        img = Image.open(image_path)

        # 使用Tesseract识别图片文字
        text = pytesseract.image_to_string(img, lang='chi_sim')

        with open(f"{image_folder}\\{file_name}.txt", "a") as file:
            file.write(text)
            file.write("\n")
        # 打印识别出的文字
        # print(f'文件名: {filename}')
        # print(f'识别结果: {text}')
        # print('-----------------------')
print(f'文字已保存到文件夹{image_folder}下')
